package spark.sql

import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}

/**
 * @Author Jeremy Zheng
 * @Date 2021/3/29 16:35
 * @Version 1.0
 */
object SparkSQL06_wordcount_Demo1 {
  def main(args: Array[String]): Unit = {

    //配置运行环境
    val spark: SparkSession = SparkSession.builder().appName("wcDemo1").master("local").getOrCreate()
    import spark.implicits._

    //业务逻辑
    val ds1: Dataset[String] = spark.read.textFile("dataSet/wordCount.txt")
    val df1: DataFrame = ds1.flatMap(_.split(",")).map((_, 1)).toDF("word", "one")
    val df2: DataFrame = df1.groupBy("word").count()
    val df3: DataFrame = ds1.flatMap(_.split(",")).toDF("word").groupBy("word").count()

    df2.createOrReplaceTempView("v_result1")
    df3.createOrReplaceTempView("v_result2")

    val whereDF1: DataFrame = spark.sql("select word,cast(count as  string) counts from v_result1 where count>1")
    whereDF1.show()
    whereDF1.printSchema()

    val whereDF2: DataFrame = spark.sql("select word,cast(count as  string) counts from v_result2 where count>1")
    whereDF2.show()
    whereDF2.printSchema()

    //关闭资源
    spark.close()
  }
}
